clear all 
cd "~/Data/Open_Close_Auctions"

global output `"~/Results"'

use auction_price_impact2021.dta, clear 
keep if year(date)>2011
 
merge 1:1 permno date using merged_permno_date_from_step2_sample_noAmex.dta

keep if _merge==3

drop _merge

sort permno date
xtset permno date


gen pct_oi=(close_imb_qty)/adv_10d
gen signed_pct_oi1=pct_oi  if inlist(close_imb_side, "B")
replace signed_pct_oi1=-1*pct_oi if close_imb_side=="S"
replace signed_pct_oi1=0 if missing(signed_pct_oi1)

* change the winsorization to 10%
replace signed_pct_oi1=. if signed_pct_oi1>0.1
replace signed_pct_oi1=. if signed_pct_oi1<-0.1
gen closing_auction_oib=signed_pct_oi1


gen signed_pct_open_oi=imbalance/adv_10d if inlist(imbalance_side, "B")
replace signed_pct_open_oi=-1*imbalance/adv_10d if inlist(imbalance_side, "S")
replace signed_pct_open_oi=0 if missing(signed_pct_open_oi)

replace signed_pct_open_oi=. if signed_pct_open_oi>0.1
replace signed_pct_open_oi=. if signed_pct_open_oi<-0.1
gen open_auction_oib=signed_pct_open_oi

gen openinterest_count=imbalance+paired
gen ocount=1 if !missing(openinterest_count) & openinterest_count!=0


 
gen subsample_group=1 if   !missing(lag1_me) & lag1_me > me_cutoff50
replace subsample_group=2 if missing(subsample_group) & !missing(lag1_me) & lag1_me < me_cutoff50 & lag1_me >= me_cutoff20
replace subsample_group=3 if missing(subsample_group) & !missing(lag1_me) & lag1_me < me_cutoff20

gen DummySmall=1 if subsample_group==2
gen DummyLarge=1 if subsample_group==1
replace DummySmall=0 if missing(DummySmall)
replace DummyLarge=0 if missing(DummyLarge)

* closing auction return

gen ret_closing_auction=(abs(prc)-price1545)/price1545
replace ret_closing_auction=. if ret_closing_auction>0.10 & !missing(ret_closing_auction)
replace ret_closing_auction=. if ret_closing_auction<-0.10 & !missing(ret_closing_auction)
replace ret_closing_auction=ret_closing_auction*1
replace ret_closing_auction=. if missing(prc) | missing(price1545)

* open auction return 
gen ret_open0928=(2*oprc-(before928ask+before928bid))/((before928ask+before928bid))
replace ret_open0928=. if ret_open0928>0.10 & !missing(ret_open0928)
replace ret_open0928=. if ret_open0928<-0.10 & !missing(ret_open0928)
replace ret_open0928=ret_open0928*1
replace ret_open0928=. if missing(before928ask) | missing(before928bid) | missing(oprc)


** both open and close auction: meaning that the stock has open and close auction and we set the dependent variable to be the missing if no open auction so that it will not enter into the estimate 
replace ret_closing_auction=. if ocount !=1

gen sp500=1 if !missing(sp500_flag)
replace sp500=0 if missing(sp500)


* have lagged volatility
gsort permno date
by permno: gen vol22d_lag1=vol_22d[_n-1]
gen vol_22d_ann=vol22d_lag1*sqrt(252)
replace vol_22d_ann=2 if vol_22d_ann>2
replace vol_22d_ann=0.01 if vol_22d_ann<0.01
* change to lag(Volat) instead of lag(LogVolat)
*gen logvol22d_ann=log(vol_22d_ann)


replace price_lag1=abs(price_lag1)
replace price_lag1=0.5 if price_lag1<0.5
replace price_lag1=300 if price_lag1>300
gen price_measure=-1*log(price_lag1)


gen nasd=1 if exchcd==3
replace nasd=0 if exchcd==1


gen dow = dow(date)
gen EA_dummy1=0 if missing(ea_dummy)
replace EA_dummy1=1 if missing(EA_dummy1)

gen Friday=1 if dow==5
replace Friday=0 if missing(Friday)


preserve
egen N_date=count(ret_closing_auction), by(date)
gsort N_date
keep if N_date>1000

* Modification in December 2024: we use the percentile of the imbalance in the whole sample to create the ptile variable
sort nasd subsample_group
gquantiles ptile=closing_auction_oib, xtile nq(100) by(nasd subsample_group)

sort nasd subsample_group ptile
by nasd subsample_group ptile: egen closing_auction_oib_100 = mean(closing_auction_oib)
by nasd subsample_group ptile: egen ret_closing_auction_100 = mean(ret_closing_auction) 

*collapse (mean) closing_auction_oib_100=closing_auction_oib_100 ret_closing_auction_100=ret_closing_auction,  by(nasd subsample_group date ptile)

sort nasd subsample_group ptile 

collapse (mean) closing_auction_oib_100=closing_auction_oib_100 ret_closing_auction_100=ret_closing_auction, by(nasd subsample_group ptile)

drop if missing(subsample_group)
drop if missing(ptile)

* export to csv file for plotting in each of the 6 subsamples : NYSE/NASDAQ  x  Large/Small/Micro
export delimited using "$output/b_100portfolios_pooled.csv", replace

restore 



***************************************
* now for open auction
****************************************

preserve

egen N_date=count(ret_open0928), by(date)
gsort N_date
keep if N_date>1000

sort nasd subsample_group

*create new variable ptile which is the percentile of the imbalance using gquantiles
* Modification in December 2024: we use the percentile of the imbalance in the whole sample to create the ptile variable
gquantiles ptile=open_auction_oib, xtile nq(100) by( nasd subsample_group)

sort nasd subsample_group ptile
by nasd subsample_group ptile: egen open_auction_oib_100 = mean(open_auction_oib)
by nasd subsample_group ptile: egen ret_open0928_100 = mean(ret_open0928) 

 
 
sort nasd subsample_group ptile 
collapse (mean) open_auction_oib_100=open_auction_oib_100 ret_open0928_100=ret_open0928_100, by(nasd subsample_group ptile)

drop if missing(subsample_group)
drop if missing(ptile)
export delimited using "$output/b_100portfolios_pooled_open.csv", replace
restore 